## Warning: Column `species` joining character vector and factor, coercing
## into character vector
underscore_to_space <- partial(str_replace_all, pattern = "_", replacement = " ")
oldest_node_tree <- trimmed_species_tree_as_df %>% pull(x) %>% max()
# labels for plot axis
max_msa_index <- kappa_caseins_alignment_positions %>% pull(msa_index) %>% max()
align_breaks <- c(
1, # min
max_msa_index, # max
seq(100, max_msa_index, 100) # intermediate positions
)
align_breaks_labels <- c(
1, # min
max_msa_index, # max
seq(100, max_msa_index, 100) # intermediate positions
) %>% as.character()
align_minor_breaks <- c(1, max_msa_index, seq(25, max_msa_index, 25))
save_plot_pdf <- partial(
ggsave,
path = plot_output_dir,
device = cairo_pdf,
units = "in"
)
save_plot_jpg <- partial(
ggsave,
path = plot_output_dir,
dpi = "retina",
units = "in"
)
save_plot <- function(plot, name, ...) {
suppressMessages(
save_plot_pdf(plot = plot, filename = glue("{name}.pdf"), ...)
)
suppressMessages(
save_plot_jpg(plot = plot, filename = glue("{name}.jpg"), ...)
)
invisible(plot)
}
save_table <- function(table, name, ...) {
write(x = table, file = file.path(table_output_dir, glue("{name}.tex")), ...)
invisible(plot)
}
Custom amino acid colours based on the colourblind friendly OkabeIto colour palette (see http://jfly.iam.u-tokyo.ac.jp/color/ and https://github.com/clauswilke/colorblindr)
aa_custom_OkabeIto_colours <- c(
"cysteine" = "#E69F00", # orange
"histidine" = "#56B4E9", # light blue
"charged +" = "#0072B2", # marine blue
"charged -" = "#D55E00", # red
"aromatic" = "#CC79A7", # pink
"proline" = "#F0E442", # yellow
"polar" = "#009E73", # green
"apolar" = "#999999", # light grey
"glycine" = "#999999", # light grey
"gap" = "#ffffff" # white
)
Kappa-casein parts colours
colour_parts_kappa_casein <- c(
"PKC" = "#e69f00", # orange
"GMP" = "#56b4e9" # blue
)
# scales and theme ----
scale_x_align_residue_plot <- scale_x_continuous(
expand = c(0, 0),
breaks = align_breaks,
labels = align_breaks_labels,
minor_breaks = align_minor_breaks
)
scale_y_align_residue_plot <- scale_y_continuous(
expand = c(0, 0),
breaks = c(0, 0.5, 1)
)
# base plots for figures and some supplementary figures
plot_kappa_caseins_parts <- kappa_caseins_alignment_positions_parts %>%
ggplot(mapping = aes(x = msa_index, y = species, fill = part)) +
geom_tile(size = 0) + # 0 for no border
scale_fill_manual(values = c(colour_parts_kappa_casein), na.value = "white") +
scale_x_align_residue_plot +
scale_y_discrete(
position = "right",
labels = underscore_to_space,
expand = c(0, 0)
) +
theme_bw() +
theme(
text = element_text(colour = "black", family = "Arial"),
axis.text = element_text(colour = "black", family = "Arial"),
axis.ticks.y = element_line(colour = "black"),
axis.ticks.x = element_line(colour = "black"),
legend.title = element_blank(),
legend.position = c(0, 1),
legend.justification = c(0, 1),
legend.background = element_rect(fill = "transparent"),
legend.box.background = element_rect(colour = "black", fill = "white", size = 0.2),
legend.spacing = unit(1, "mm"),
legend.key = element_blank(),
legend.box.spacing = unit(0.01, "line"),
legend.key.size = unit(5, "pt"),
legend.text = element_text(margin = margin(0, 0, 0, 2), size = 8),
legend.margin = margin(1, 1, 1, 1),
axis.ticks = element_line(size = 0.2),
axis.text.y = element_text(size = 7, face = "italic"),
axis.title.x = element_text(size = 8),
axis.text.x = element_text(size = 7),
panel.background = element_rect(fill = "white", colour = "white"),
plot.background = element_rect(fill = "white", colour = "white"),
panel.border = element_rect(fill = "transparent", colour = "black", size = 0.2),
axis.title.y = element_blank(),
plot.title = element_text(size = 9),
plot.subtitle = element_blank(),
panel.grid.major.x = element_line(size = 0.1),
panel.grid.major.y = element_line(size = 0.1),
panel.grid.minor.x = element_blank()
) +
labs(x = "aligned amino acid position") +
NULL
# equal expand values for the y axis to assure the 2 plots align well
indels_kappacasein_parts_plot_expand <- c(0.005, 0.0)
# species tree on the left
plot_kappa_casein_tree <- ggplot(trimmed_species_tree, size = 0.5) +
geom_tree(size = 0.2) +
geom_treescale(x = 10, y = 40, fontsize = 2.5, linesize = 0.4, offset = 1) +
theme_bw() +
theme(
text = element_text(colour = "black", family = "Arial"),,
plot.margin = margin(0, 0, 0, 0),
panel.spacing = margin(0, 5, 5, 5),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
) +
scale_y_continuous(expand = indels_kappacasein_parts_plot_expand) +
scale_x_continuous(expand = c(0, 0)) +
xlim_tree(167)
# custom geom to be used for annotation with consistent style
geom_cladelabel_custom <- partial(
geom_cladelabel,
fontsize = 2.4,
offset = 0.5,
offset.text = 0.1,
family = "arial",
barsize = 0.4
)
# geological time periods
# package geoscale
# Gradstein, F. M., Ogg, J. M., and Schmitz, M., 2012, A geologic time scale, Boston, USA, Elsevier.
data(timescales)
mammals_periods <- timescales %>%
purrr::pluck("ICS2015") %>%
filter(End < oldest_node_tree, !is.na(Part_of)) %>%
rename(geologic.period = Part_of) %>%
group_by(geologic.period) %>%
summarise(start = max(Start), end = min(End)) %>%
ungroup() %>%
mutate(geologic.period = fct_reorder(geologic.period, end)) %>%
arrange(desc(geologic.period)) %>%
identity()
mammals_periods_breaks <- c(mammals_periods$start) %>% round(1)
# Panel A - maximum likelihood protein tree
plot_fitted_tree <- fitted_tree %>%
ggtree(size = 0.1, color = "black") +
xlim_tree(3.4) +
geom_treescale(
x = 0.1,
y = 30,
offset = 2,
fontsize = 2.5,
linesize = 0.4,
width = 0.1
) +
geom_tiplab(
size = 0.7,
mapping = aes(label = underscore_to_space(label)),
family = "arial",
fontface = "italic"
) +
scale_x_continuous(expand = c(0.02, 0)) +
scale_y_continuous(expand = c(0.02, 0)) +
geom_cladelabel_custom(node = 101, label = "Monotremes") +
geom_cladelabel_custom(node = 196, label = "Marsupials") +
geom_cladelabel_custom(node = 194, label = "Afrotheria") +
geom_cladelabel_custom(node = 106, label = "Glires") +
geom_cladelabel_custom(node = 186, label = "Bats") +
geom_cladelabel_custom(node = 139, label = "New world monkeys") +
geom_cladelabel_custom(node = 126, label = "Old world monkeys") +
geom_cladelabel_custom(node = 134, label = "Apes") +
geom_cladelabel_custom(node = 175, label = "Carnivorans") +
geom_cladelabel_custom(node = 149, label = "Ruminantia") +
geom_cladelabel_custom(node = 164, label = "Cetaceans") +
geom_cladelabel_custom(node = 169, label = "Camelids") +
geom_cladelabel_custom(node = 172, label = "Odd-toed ungulates") +
labs(
tag = "A"
) +
NULL
# Panel B- scatter plot
plot_compare_evodist_divtime <- compare_evodist_divtime %>%
filter(corrected == TRUE) %>%
ggplot(
mapping = aes(
x = divergence.time,
y = distance.fit,
label = glue("{species.1}-{species.2}") # dev only
)
) +
geom_vline( # separates geological periods
xintercept = mammals_periods_breaks,
colour = "darkgrey",
linetype = "dotted", size = 0.5
) +
geom_point( # couples of species
size = 0.7,
alpha = 0.1,
colour = "black"
) +
geom_text( # annotate geological periods at the top
data = mammals_periods %>% filter(geologic.period != "Quaternary"),
mapping = aes(label = geologic.period, x = start + (end - start) / 2),
y = Inf,
family = "arial",
vjust = 1.4,
size = 2.5
) +
scale_x_continuous(
limits = c(0, max(mammals_periods_breaks)),
minor_breaks = seq(0, 200, 10),
sec.axis = sec_axis(~., breaks = mammals_periods_breaks),
expand = c(0.01, 0)
) +
labs(
x = "Divergence Time (Mya)",
y = "Pairwise distance",
tag = "B"
) +
theme_bw() +
theme(
legend.position = "none",
axis.text = element_text(size = 7),
axis.title = element_text(size = 9),
axis.ticks.x = element_line(size = 0.2),
panel.border = element_rect(fill = NA, colour = "black", size = 0.4)
) +
NULL
(plot_compare_evodist_divtime_combined <- plot_fitted_tree +
plot_compare_evodist_divtime +
plot_layout(widths = c(2.8, 5.2)) &
theme(
text = element_text(colour = "black", family = "Arial", face = "plain")
)
)
save_plot(
plot = plot_compare_evodist_divtime_combined,
name = "figure_1"
)
plot_indels_kappacasein_parts <- plot_kappa_caseins_parts +
geom_rect( # PTRs annotations
data = df_plot_indels,
mapping = aes(
ymin = as.numeric(species) - 0.4,
ymax = as.numeric(species) + 0.4,
xmin = N_flank_position - 1,
xmax = C_flank_position,
colour = type
),
inherit.aes = FALSE,
alpha = 0.0,
size = 0.25
) +
scale_colour_manual(values = c("tandem repeat" = "black")) +
theme(
text = element_text(colour = "black", family = "Arial"),
legend.position = c(0, 1),
legend.justification = c(0, 1),
axis.title.y = element_blank(),
axis.line.y = element_blank(),
axis.text.y = element_text(size = 6, face = "italic", colour = "black"),
axis.ticks = element_line(size = 0.2),
legend.title = element_blank(),
legend.text = element_text(margin = margin(0, 0, 0, 2), size = 9),
plot.margin = margin(0, 0, 0, 0),
panel.spacing = margin(0, 5, 5, 0),
legend.background = element_rect(fill = "transparent"),
legend.box.background = element_rect(
colour = "black",
fill = "white",
size = 0.1
),
legend.spacing = unit(1, "mm"),
legend.key = element_blank(),
panel.background = element_rect(fill = "white", colour = "white"),
plot.background = element_rect(fill = "white", colour = "white"),
legend.box.spacing = unit(0.01, "line"),
legend.key.height = unit(0.5, "lines"),
legend.key.width = unit(0.5, "lines"),
panel.border = element_rect(
fill = "transparent",
colour = "black",
size = 0.2
)
) +
labs(
# x = "distance from the PKC/GMP cleavage site"
)
(plot_combined_tree_lengths <- plot_kappa_casein_tree +
plot_indels_kappacasein_parts +
plot_layout(widths = c(0.5, 2), ncol = 2)
)
save_plot(
plot = plot_combined_tree_lengths,
name = "figure_2"
)
group_text_aa <- tribble(
~x, ~label,
2.5, "polar non charged\n",
7, "hydrophobic\n",
11, "aromatic\n",
14, "charged\n+",
16.5, "charged\n—",
19, "other\n",
)
(plot_aa_comp_range <- kappa_caseins_aa_comp_df_plot %>%
ggplot(mapping = aes(
ymin = min,
x = as.numeric(residues),
ymax = max,
colour = part
)) +
geom_errorbar(size = 0.9, position = position_dodge(width = 0.8)) +
geom_vline(xintercept = c(4.5, 9.5, 12.5, 15.5, 17.5), colour = "darkgrey", size = 0.05) +
geom_text(data = group_text_aa, mapping = aes(x = x, label = label), size = 3, y = Inf, vjust = 1.5, inherit.aes = F, lineheight = 0.8, color = "black") +
theme_minimal() +
scale_x_continuous(breaks = 1:20, labels = list_residues_ordered_aa_comp, sec.axis = dup_axis(labels = function(x) {one_2_three_aas[list_residues_ordered_aa_comp[x]]}), expand = c(0.01, 0)) +
scale_colour_manual(values = c(colour_parts_kappa_casein)) +
scale_y_continuous(expand = c(0.0, 0), sec.axis = dup_axis(), limits = c(NA, 0.34)) +
labs(
y = "frequency",
x = "amino acid"
) +
theme(
text = element_text(colour = "black", family = "Arial"),
axis.text = element_text(colour = "black", family = "Arial"),
axis.ticks = element_line(size = 0.1),
axis.text.x.top = element_text(angle = 45, hjust = 0),
axis.title.x.top = element_blank(),
axis.title.y.right = element_blank(),
panel.grid = element_blank(),
panel.grid.major = element_blank(),
panel.grid.major.x = element_line(size = 0.1, colour = "grey", linetype = "dotted"),
panel.grid.minor.x = element_blank(),
strip.text = element_text(hjust = 0, size = 10),
panel.spacing.x = unit(1, "lines"),
panel.border = element_rect(colour = "black", size = 0.9, fill = "transparent"),
legend.position = c(0, -0.07),
legend.justification = c(0, 1),
legend.background = element_rect(colour = "white", fill = "white", size = NA),
legend.direction = "horizontal",
legend.box.margin = margin(0, 0, 0, 0),
legend.margin = margin(0, 0, 0, 0),
legend.box.spacing = unit(0.0, "lines"),
plot.margin = margin(1, 1, 1, 1),
legend.title = element_blank()
) +
NULL
)
save_plot(
plot = plot_aa_comp_range,
name = "figure_3"
)
# names and labels ----
label_other_residues <- "other"
label_glutamine <- "glutamine (Q)"
label_asparagine <- "asparagine (N)"
label_tyrosine <- "tyrosine (Y)"
label_other_aromatic <- "phenylalanine (F) or tryptophan (W)"
label_charged_minus <- "negatively charged (D or E)"
label_charged_plus <- "positively charged (R, K or H)"
label_pSer <- "phosphorylated serine (pS)"
label_Ser <- "Serine (S)"
label_Ch <- "goat" # Capra hircus
label_Bt <- "cow" # Bos taurus
# other residues diplayed in grey ----
colour_other_residues <- c()
colour_other_residues[label_other_residues] <- "#e7e7e7"
# colours for each plot ----
colours_asngln <- colour_other_residues
colours_asngln[label_glutamine] <- "#002016"
colours_asngln[label_asparagine] <- "#009E73"
colours_aromatics <- colour_other_residues
colours_aromatics[label_other_aromatic] <- "#CC79A7"
colours_aromatics[label_tyrosine] <- "#461b32"
colours_charged <- colour_other_residues
colours_charged[label_charged_minus] <- "#e41a1c"
colours_charged[label_charged_plus] <- "#377eb8"
colours_phosphorylations <- colour_other_residues
colours_phosphorylations[label_pSer] <- "#247a62"
colours_phosphorylations[label_Ser] <- "#8da0cb"
colours_known_pSer <- c()
colours_known_pSer[label_Bt] <- "#a65628"
colours_known_pSer[label_Ch] <- "#4daf4a"
# vertical line at PKC/GMP cleavage site
vline_cleavage <- geom_vline(
xintercept = position_align_chymosin_cleavage_site + 0.5,
colour = "black",
size = 0.2
)
# weight residues according to species tree ----
fct_weight_conservation <- . %>%
select(-seq_index, -residue) %>%
left_join(kappa_caseins_phylo_tree_weights, by = "species") %>%
group_by(msa_index, class) %>%
summarise(
n_w = sum(weight_norm)
) %>%
ungroup()
# hydrophobicity legend ----
kd_range_max <- max(KyteDoolittle_scale_df$hydrophobicity)
kd_range_min <- min(KyteDoolittle_scale_df$hydrophobicity)
hydrophobicity_breaks <- c(kd_range_min, 2, 0, -2, kd_range_max)
# known phosphorylated serines ----
# from Uniprot
# goat: https://www.uniprot.org/uniprot/P02670#ptm_processing
# cow: https://www.uniprot.org/uniprot/P02668#ptm_processing
known_pSer_site_df <- tribble(
~msa_index, ~label, ~y,
355, label_Ch, Inf,
294, label_Bt, 0.91,
294, label_Ch, Inf,
179, label_Bt, 0.91,
179, label_Ch, Inf
)
# Panel A - glutamine and asparagine ----
# dataset
df_plot_asngln <- kappa_caseins_alignment_positions %>%
filter(residue != "-") %>%
mutate(
class = case_when(
residue == "Q" ~ label_glutamine,
residue == "N" ~ label_asparagine,
TRUE ~ label_other_residues
) %>%
fct_relevel(label_other_residues, label_asparagine, label_glutamine) # order colours for plot
) %>%
fct_weight_conservation()
# plot
plot_alignment_asngln <- df_plot_asngln %>%
ggplot(mapping = aes(y = n_w, x = msa_index, fill = class)) +
geom_bar(stat = "identity", colour = NA) +
scale_fill_manual(
values = colours_asngln,
guide = guide_legend(reverse = TRUE)
) +
labs(fill = "residue") +
NULL
# Panel B - tyrosines and aromatics ----
# dataset
df_plot_aromatics <- kappa_caseins_alignment_positions %>%
filter(residue != "-") %>%
mutate(
class = case_when(
residue == "Y" ~ label_tyrosine,
residue %in% c("F", "W") ~ label_other_aromatic,
TRUE ~ label_other_residues
) %>%
fct_relevel(label_other_residues, label_other_aromatic, label_tyrosine) # order colours for plot
) %>%
fct_weight_conservation()
# plot
plot_alignment_aromatics <- df_plot_aromatics %>%
ggplot(mapping = aes(y = n_w, x = msa_index, fill = class)) +
geom_bar(stat = "identity", colour = NA) +
scale_fill_manual(
values = colours_aromatics,
guide = guide_legend(reverse = TRUE)
) +
labs(fill = "residue") +
NULL
# Panel C - charged residues ----
# dataset
df_plot_charged <- kappa_caseins_alignment_positions %>%
filter(residue != "-") %>%
mutate(
class = case_when(
residue %in% c("R", "K", "H") ~ label_charged_plus,
residue %in% c("E", "D") ~ label_charged_minus,
TRUE ~ label_other_residues
) %>%
fct_relevel(label_other_residues, label_charged_plus, label_charged_minus) # order colours for plot
) %>%
fct_weight_conservation()
# plot
plot_alignment_charged <- df_plot_charged %>%
ggplot(mapping = aes(y = n_w, x = msa_index, fill = class)) +
geom_bar(stat = "identity", colour = NA) +
scale_fill_manual(
values = colours_charged,
guide = guide_legend(reverse = TRUE)
) +
labs(fill = "residue") +
NULL
# Panel D - predicted disorder ----
# dataset
df_plot_disorder <- kappa_casein_disorder_align %>%
filter(!is.na(seq_index)) %>%
mutate(
class = cut(iupred_short, breaks = seq(0, 1, 0.2))
) %>%
fct_weight_conservation()
# plot
plot_alignment_disorder <- df_plot_disorder %>%
ggplot(mapping = aes(y = n_w, x = msa_index, fill = class)) +
geom_bar(stat = "identity", colour = NA) +
scale_fill_brewer(palette = "Greens") +
labs(fill = "predicted disorder") +
NULL
# Panel E - hydrophobicity ----
# dataset
df_plot_hydrophobicity <- kappa_caseins_hydrophobicity %>%
filter(residue != "-") %>%
rename(class = hydrophobicity) %>%
fct_weight_conservation()
# plot
plot_alignment_hydrophobicity <- df_plot_hydrophobicity %>%
ggplot(mapping = aes(y = n_w, x = msa_index, fill = class)) +
geom_bar(stat = "identity", colour = NA) +
scale_fill_distiller(
palette = "BrBG",
limits = c(kd_range_min, kd_range_max),
direction = 1,
breaks = sort(hydrophobicity_breaks),
guide = guide_legend(barwidth = 10, barheight = 0.4, label.theme = element_text(size = 6))
) +
labs(fill = "hydrophobicity") +
NULL
# Panel F - phosphorylations ----
# dataset
df_plot_phosphorylations <- kappa_casein_fam20c_matches_tidy %>%
mutate(match = TRUE) %>%
rename(seq_index = match_position) %>%
full_join(kappa_caseins_alignment_positions, by = c("species", "seq_index")) %>%
mutate(match = !is.na(match)) %>%
filter(residue != "-") %>%
mutate(
class = case_when(
match == TRUE ~ label_pSer,
residue == "S" ~ label_Ser,
TRUE ~ label_other_residues
) %>%
fct_relevel(label_other_residues, label_Ser, label_pSer) # order colours for plot
) %>%
fct_weight_conservation()
# plot
plot_alignment_phosphorylations <- df_plot_phosphorylations %>%
ggplot(mapping = aes(y = n_w, x = msa_index, fill = class)) +
geom_bar(stat = "identity", colour = NA) +
geom_text(data = known_pSer_site_df, mapping = aes(x = msa_index, colour = label, y = y), vjust = 1, label = "▼", inherit.aes = FALSE, size = 3) +
scale_colour_manual(values = colours_known_pSer) +
scale_fill_manual(
values = colours_phosphorylations,
guide = guide_legend(reverse = TRUE)
) +
labs(
fill = "residue",
colour = "known phosphorylation sites"
) +
guides(fill = guide_legend(order = 1, reverse = TRUE)) +
NULL
# combine all plots ----
(plot_alignment_combined <- {
(plot_alignment_asngln + labs(tag = "A")) /
(plot_alignment_aromatics + labs(tag = "B")) /
(plot_alignment_charged + labs(tag = "C")) /
(plot_alignment_disorder + labs(tag = "D")) /
(plot_alignment_hydrophobicity + labs(tag = "E")) /
(plot_alignment_phosphorylations + labs(tag = "F"))
} + plot_layout(ncol = 1) &
vline_cleavage & # following scales, themes and labs applied to all subplots
scale_x_align_residue_plot &
scale_y_align_residue_plot &
theme_bw() &
theme(
text = element_text(family = "Arial", colour = "black"),
legend.position = "top",
legend.title = element_text(size = 7),
legend.text = element_text(size = 7),
legend.box.spacing = unit(0.01, "line"),
axis.text = element_text(size = 7, colour = "black"),
axis.title = element_text(size = 7),
panel.border = element_rect(size = 0.2),
axis.ticks = element_line(size = 0.2),
legend.key.width = unit(0.5, "lines"),
legend.key.height = unit(0.5, "lines"),
plot.tag.position = c(0, 1),
plot.tag = element_text(colour = "black", size = 11),
legend.justification = c(0, 0),
legend.spacing.x = unit(0.4, "lines"),
plot.margin = margin(0, 0, 0, 0),
panel.grid.major.x = element_line(size = 0.01),
panel.grid.minor.x = element_line(size = 0.01),
panel.grid.major.y = element_line(size = 0.01),
panel.grid.minor.y = element_line(size = 0.01),
) &
labs(
x = "aligned amino acid position",
y = "weighted frequency"
)
)
save_plot(
plot = plot_alignment_combined,
name = "figure_4"
)
pH_breaks <- c(3, 4, 5, 6, 7)
pH_minor_breaks <- seq(1.5, 7.5, 0.5)
ncpr_highlighted_species <- c(
"Saimiri_boliviensis",
"Cavia_porcellus",
"Homo_sapiens",
"Mus_musculus",
"Bos_taurus"
)
geom_line_highlight_species <- partial(geom_line, mapping = aes(colour = species), size = 0.3)
# panel A
plot_ncpr_change_mature <- kappa_caseins_ncpr %>%
filter(part == "mature") %>%
ggplot(mapping = aes(x = pH, y = net_charge_per_residue, group = species)) +
geom_hline(yintercept = 0, size = 0.1, linetype = "dotted") +
geom_line(size = 0.1, colour = "lightgray") +
facet_wrap(~part) +
scale_y_continuous(
limits = c(-0.21, 0.21)
) +
scale_x_continuous(
breaks = pH_breaks,
minor_breaks = pH_minor_breaks,
expand = c(0, 0)
) +
scale_color_brewer(
palette = "Set1",
labels = underscore_to_space,
guide = guide_legend(
nrow = 2,
title.hjust = 0,
label.hjust = 0,
title.vjust = 1,
label.vjust = 0.5,
byrow = TRUE,
direction = "horizontal"
)
) +
labs(
y = "net charge per residue",
x = "pH"
) +
theme_bw() +
theme(
legend.title = element_blank(),
legend.position = "none",
panel.background = element_rect(size = 0.2, colour = "black"),
panel.grid.major = element_line(size = 0.2),
plot.tag.position = c(0, 1),
strip.background = element_blank(),
panel.spacing = unit(1.2, "line"),
axis.text = element_text(size = 9, colour = "black"),
axis.title = element_text(size = 10, colour = "black"),
axis.ticks = element_line(size = 0.2, colour = "black")
) +
NULL
# panel B
# same plot, different data
plot_ncpr_change_parts <- plot_ncpr_change_mature %+%
(kappa_caseins_ncpr %>% filter(part != "mature")) &
theme(
legend.margin = margin(0, 0, 0, 0),
legend.text.align = 0,
legend.text = element_text(face = "italic", size = 9),
legend.key.size = unit(10, "pt"),
legend.position = "bottom",
legend.box.spacing = unit(0.05, "line"),
legend.justification = c(1, 1),
)
# combination of the 2 plots
# highlight species of interests
(plot_charge_combined <- (
plot_ncpr_change_mature +
geom_line_highlight_species(
data = subset(
kappa_caseins_ncpr,
part == "mature" &
species %in% ncpr_highlighted_species
)
) +
labs(tag = "A")
) +
(
plot_ncpr_change_parts +
geom_line_highlight_species(
data = subset(
kappa_caseins_ncpr,
part != "mature" &
species %in% ncpr_highlighted_species
)
) +
labs(tag = "B")
) +
plot_layout(widths = c(1, 2)) &
theme(text = element_text(colour = "black", family = "Arial"))
)
save_plot(
plot = plot_charge_combined,
name = "figure_5"
)
# merge predicted count phosphorylations/o-glycosylations
df_phospho_glyco_counts <- bind_rows(
phosphorylation = kappa_casein_fam20c_counts,
Oglycosylation = kappa_casein_oglyc_glycomine_counts,
.id = "PTM"
) %>%
order_species_tree() %>%
order_cleavage_parts() %>%
mutate(n = if_else(part == "PKC", -n, n)) # uses negative values for PKC
# plot phosphorylations
plot_phospho_counts <- df_phospho_glyco_counts %>%
filter(PTM == "phosphorylation") %>%
ggplot(mapping = aes(x = n, y = species, fill = part)) +
geom_barh(stat = "identity") +
scale_fill_manual(values = c(colour_parts_kappa_casein)) +
scale_y_discrete(expand = c(0.005, 0.0), position = NULL) +
scale_x_continuous(
expand = c(0.005, 0),
limits = c(-8, 8),
breaks = seq(-8, 8, 2),
minor_breaks = seq(-8, 8, 1),
labels = c(seq(8, 2, -2), 0, seq(2, 8, 2)),
sec.axis = dup_axis()
) +
theme_bw() +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = c(0, 1),
legend.justification = c(0, 1),
) +
labs(
x = "count of predicted phosphorylations"
) +
NULL
# plot glycosylations
plot_glyco_counts <- df_phospho_glyco_counts %>%
filter(PTM == "Oglycosylation") %>%
ggplot(mapping = aes(x = n, y = species, fill = part)) +
geom_barh(stat = "identity") +
scale_fill_manual(values = c(colour_parts_kappa_casein)) +
scale_y_discrete(
expand = c(0.005, 0.0),
position = "right",
labels = underscore_to_space
) +
scale_x_continuous(
expand = c(0.005, 0),
limits = c(-20, 20),
breaks = seq(-20, 20, 5),
minor_breaks = seq(-20, 20, 1),
labels = c(seq(20, 5, -5), 0, seq(5, 20, 5)),
sec.axis = dup_axis()
) +
theme_bw() +
theme(
legend.position = "none",
axis.text.y = element_text(face = "italic", size = 6),
) +
labs(
x = "count of predicted O-glycosylations"
) +
NULL
(plot_phospho_glyco_counts <- {
plot_kappa_casein_tree + {
{
plot_phospho_counts + plot_glyco_counts
} &
theme(
text = element_text(colour = "black", family = "Arial"),
plot.margin = margin(0, 3, 0, 3),
axis.title.y = element_blank(),
axis.text.x = element_text(size = 7),
axis.ticks = element_line(size = 0.1, lineend = "butt"),
plot.title = element_blank(),
plot.subtitle = element_blank(),
axis.title.x = element_text(size = 8),
legend.text = element_text(size = 7),
legend.key.height = unit(0.9, "lines"),
legend.key.width = unit(0.9, "lines"),
legend.title = element_blank(),
legend.background = element_rect(fill = "transparent"),
legend.box.background = element_rect(colour = "black", fill = "white", size = 0.2),
legend.spacing = unit(1, "mm"),
legend.key = element_blank(),
legend.box.spacing = unit(0.01, "line"),
legend.margin = margin(1, 1, 1, 1),
panel.grid.major.x = element_line(size = 0.1),
panel.grid.major.y = element_line(size = 0.1),
panel.grid.minor.x = element_blank()
) &
geom_vline(xintercept = 0, size = 0.05, colour = "black")
}
} +
plot_layout(widths = c(0.7, 4))
)
save_plot(
plot = plot_phospho_glyco_counts,
name = "figure_6"
)
all_cysteines_positions_rel_cattle <- kappa_caseins_cysteines_positions_rel_cattle %>%
pull(cattle_position) %>%
unique()
SS_bonds_categories <- kappa_caseins_SS_bonds_classes %>%
pull(bond) %>%
levels()
# kappa-casein residues for every position with a cysteine in a species
plot_kappa_casein_cysteines <- kappa_caseins_cysteines_positions_rel_cattle %>%
mutate(position_id = group_indices(., cattle_position)) %>% # to simulate continuous axis
ggplot(mapping = aes(
x = position_id,
y = species,
label = residue,
fill = group
)) +
geom_tile(mapping = aes(alpha = group)) +
geom_text(family = "Source Code Pro", size = 1.8) +
scale_fill_manual("amino acid", values = aa_custom_OkabeIto_colours) +
scale_alpha_manual("amino acid", values = c("cysteine" = 1, set_names(rep(0.6, 19), c("histidine", "aromatic", "glycine", "proline", "polar", "apolar", "charged +", "charged -")))) +
theme_bw() +
theme(legend.position = "bottom") +
scale_x_continuous(
breaks = 1:12,
labels = all_cysteines_positions_rel_cattle,
expand = c(0, 0),
sec.axis = dup_axis()
) +
scale_y_discrete(
expand = c(0, 0)
) +
guides(fill = guide_legend(nrow = 9, ncol = 1)) +
theme(
text = element_text(colour = "black", family = "Arial"),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
legend.direction = "vertical",
legend.position = c(-0.3, 1),
legend.justification = c(0, 1),
legend.key.width = unit(0.25, "lines"),
legend.key.height = unit(0.25, "lines"),
legend.title = element_text(size = 6),
legend.text = element_text(size = 6),
legend.box.margin = margin(2, 2, 2, 2, unit = "pt"),
legend.margin = margin(2, 2, 2, 2, unit = "pt"),
legend.background = element_rect(fill = "transparent"),
legend.box.background = element_rect(
colour = "black",
fill = "white",
size = 0.1
),
legend.spacing = unit(0.5, "mm"),
axis.title.y = element_blank(),
axis.title.x = element_text(size = 7),
axis.text.x = element_text(size = 7),
axis.ticks = element_line(size = 0.2),
panel.border = element_rect(size = 0.05, colour = "black"),
panel.grid.major.x = element_blank(),
plot.margin = margin(0, 0, 0, 0),
panel.grid.major.y = element_line(size = 0.01)
) +
labs(
x = "Bovine mature kappa-casein residue position"
) +
NULL
# possibilities to form dimer/oligo/intrachain SS bonds
plot_SS_bonds <- kappa_caseins_SS_bonds_classes %>%
filter(possibility) %>%
arrange(bond) %>%
mutate(bond_id = group_indices(., bond)) %>% # to simulate continuous axis
ggplot(mapping = aes(x = bond_id, y = species)) +
geom_text(label = "\u2713", size = 4) + # draw a tick
geom_vline(xintercept = c(1.5, 2.5), size = 0.1, colour = "black") +
scale_y_discrete(
position = "right",
labels = underscore_to_space,
drop = FALSE
) +
scale_x_continuous(
breaks = 1:3,
labels = SS_bonds_categories,
sec.axis = dup_axis(),
expand = c(0.25, 0)
) +
theme_bw() +
theme(
legend.position = "none",
axis.text.y = element_text(size = 5, face = "italic", colour = "black"),
axis.title.x = element_text(size = 7),
panel.border = element_rect(size = 0.1, colour = "black"),
panel.grid.major.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(size = 7),
axis.ticks = element_line(size = 0.2),
plot.margin = margin(0, 0, 0, 0),
panel.grid.major.y = element_line(size = 0.01)
) +
labs(
x = "predicted possible type of bonds"
) +
NULL
(plot_combined_cysteines <- {
plot_kappa_casein_tree + plot_kappa_casein_cysteines + plot_SS_bonds &
theme(
plot.margin = margin(0, 0, 0, 0),
)
} +
plot_layout(widths = c(0.5, 2, 0.9)) +
NULL
)
save_plot(
plot = plot_combined_cysteines,
name = "figure_7"
)
# data sets
pepsin_positions <- kappa_caseins_cleavage_positions %>%
filter(peptidase == "pepsin") %>%
left_join(kappa_caseins_alignment_positions, by = c("species", "cleavage_position" = "seq_index")) %>%
order_species_tree()
plasmin_positions <- kappa_caseins_cleavage_positions %>%
filter(peptidase == "plasmin") %>%
left_join(kappa_caseins_alignment_positions, by = c("species", "cleavage_position" = "seq_index")) %>%
order_species_tree()
plot_kappa_casein_pepsin <- plot_kappa_caseins_parts +
geom_segment(
size = 0.1,
data = pepsin_positions,
mapping = aes(
x = msa_index + 0.5,
xend = msa_index + 0.5,
y = as.integer(species) - 0.5,
yend = as.integer(species) + 0.5,
colour = "predicted\ncleavage\nsite"
),
inherit.aes = FALSE
) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = c(-0.45, 1)
) +
labs(title = "Pepsin") +
scale_colour_manual(values = c("predicted\ncleavage\nsite" = "black")) +
guides(fill = guide_legend(order = 1))
plot_kappa_casein_plasmin <- plot_kappa_caseins_parts +
geom_segment(
size = 0.1,
data = plasmin_positions,
mapping = aes(
x = msa_index + 0.5,
xend = msa_index + 0.5,
y = as.integer(species) - 0.5,
yend = as.integer(species) + 0.5
),
inherit.aes = FALSE
) +
theme(
legend.position = "none",
axis.text.y = element_text(colour = "black", size = 6)
) +
labs(title = "Plasmin")
(cleavage_sites_position_plot <- {
plot_kappa_casein_tree + {
plot_kappa_casein_pepsin +
plot_kappa_casein_plasmin &
theme(
text = element_text(colour = "black", family = "Arial"),
plot.margin = margin(0, 7, 0, 0),
legend.key.height = unit(0.9, "lines"),
legend.key.width = unit(0.9, "lines"),
)
}
} +
plot_layout(width = c(0.8, 4, 4))
)
## Warning in xList[i] <- valueList: number of items to replace is not a
## multiple of replacement length
save_plot(
plot = cleavage_sites_position_plot,
name = "figure_8"
)
## Warning in xList[i] <- valueList: number of items to replace is not a
## multiple of replacement length
## Warning in xList[i] <- valueList: number of items to replace is not a
## multiple of replacement length
# positions clades
clades_to_annotate <- tribble(
~node, ~clade,
101, "monotremes",
197, "marsupials",
103, "placentals"
) %>%
left_join(trimmed_species_tree_as_df, by = "node") %>%
select(clade, branch, y)
plot_kappa_casein_tree_clades <- plot_kappa_casein_tree +
geom_text(
data = clades_to_annotate,
mapping = aes(x = branch, y = y, label = clade),
vjust = -0.3,
size = 2.8,
hjust = 0.5
)
plot_msa <- kappa_caseins_alignment_positions %>%
filter(residue != "-") %>%
left_join(amino.acid.classifications %>%
filter(classification == "custom: charge and aromatic"),
by = c("residue" = "residue")
) %>%
ggplot(mapping = aes(x = msa_index, y = species, fill = class)) +
geom_tile(size = 0, alpha = 0.85) +
scale_colour_manual(values = c("tandem repeat" = "black")) +
scale_fill_manual(values = aa_custom_OkabeIto_colours) +
scale_x_continuous(
expand = c(0, 0),
breaks = align_breaks,
labels = align_breaks_labels,
minor_breaks = align_minor_breaks
) +
scale_y_discrete(
position = "right",
expand = c(0.005, 0.0),
labels = underscore_to_space
) +
labs(
x = "amino acid position in alignment"
) +
theme_bw() +
theme(
text = element_text(colour = "black", family = "Arial"),
legend.position = "none",
axis.title.y.right = element_blank(),
plot.margin = margin(0, 0, 0, 0),
panel.spacing = margin(0, 0, 0, 0),
axis.text.y = element_text(face = "italic", size = 7, colour = "black")
) +
NULL
(plot_tree_msa <- plot_kappa_casein_tree_clades +
plot_msa +
plot_layout(ncol = 2, nrow = 1, widths = c(0.2, 0.7)) +
NULL
)
save_plot(
plot = plot_tree_msa,
name = "figure_S1"
)
# same plot, different data
(plot_compare_evodist_divtime_nocorrect <- plot_compare_evodist_divtime %+%
(compare_evodist_divtime %>%
filter(corrected == FALSE)) &
theme(
text = element_text(colour = "black", family = "Arial"),
plot.tag = element_blank())
)
save_plot(
plot = plot_compare_evodist_divtime_nocorrect,
name = "figure_S2"
)
species_old_indels <- c(
"Homo_sapiens" = "human",
"Bos_taurus" = "cow",
"Tachyglossus_aculeatus" = "echidna",
"Ornithorhynchus_anatinus" = "platypus",
"Monodelphis_domestica" = "opossum",
"Trichosurus_vulpecula" = "possum"
)
# tree
tips_to_drop_old_indels <- trimmed_species_tree_as_df %>%
filter(isTip, !label %in% names(species_old_indels)) %>%
pull(node)
tree_old_indels <- trimmed_species_tree %>%
drop.tip(tips_to_drop_old_indels)
tree_old_indels_df <- tree_old_indels %>%
ggplot2::fortify()
plot_tree_old_indels <- tree_old_indels_df %>%
ggtree(size = 0.4) +
geom_treescale(x = 50, y = 2)
# "barplot"
aa_old_indels_df <- kappa_caseins_alignment_positions %>%
filter(species %in% names(species_old_indels)) %>%
arrange(msa_index) %>%
mutate(is_gap = residue == "-") %>% # remove positions with gaps in **all** selected species
group_by(msa_index) %>%
mutate(all_gaps = all(is_gap)) %>%
ungroup() %>%
filter(!all_gaps, !is_gap) %>%
mutate(new_index = group_indices(., msa_index)) %>%
select(-residue, -all_gaps) %>% # order plot according to the new tree
mutate_at("species", as.character) %>%
left_join(tree_old_indels_df, by = c("species" = "label")) %>%
mutate(species = fct_reorder(species, y)) %>%
select(species, new_index)
aa_old_indels_df_max_length <- max(aa_old_indels_df$new_index)
plot_bars_old_indels <- aa_old_indels_df %>%
ggplot(mapping = aes(x = new_index, y = species)) +
geom_tile(colour = "black", fill = "black") +
theme_bw() +
theme(
text = element_text(family = "Arial", colour = "black"),
axis.ticks = element_line(size = 0.2, colour = "black"),
axis.title.y = element_blank(),
axis.line.y = element_blank(),
axis.text.y = element_text(face = "italic", colour = "black"),
axis.title.x = element_text(colour = "black"),
axis.text.x = element_text(colour = "black"),
plot.margin = margin(0, 0, 0, 0),
panel.background = element_rect(fill = "white", colour = "white"),
plot.background = element_rect(fill = "white", colour = "white"),
legend.box.spacing = unit(0.01, "line"),
legend.key.size = unit(5, "pt"),
panel.border = element_blank(),
strip.background = element_blank(),
plot.subtitle = element_blank()
) +
labs(x = "amino acid position in alignment") +
scale_x_continuous(
expand = c(0.02, 0),
breaks = c(1, seq(50, 300, 50), aa_old_indels_df_max_length),
minor_breaks = seq(0, 300, 25)
) +
scale_y_discrete(position = "right", labels = underscore_to_space) +
NULL
(plot_tree_old_indels <- plot_tree_old_indels +
plot_bars_old_indels +
plot_layout(widths = c(0.25, 1))
)
save_plot(
plot = plot_tree_old_indels,
name = "figure_S3"
)
(aacomp_scatter_plot <- kappa_caseins_parts_aa_composition %>%
mutate(residues = fct_relevel(residues, list_residues_ordered_aa_comp)) %>%
ggplot(mapping = aes(x = GMP, y = PKC)) +
geom_abline(slope = 1, color = "darkgrey", size = 0.1) +
geom_point(alpha = 0.3, size = 0.3) +
facet_wrap(~residues, ncol = 5) +
theme_bw() +
theme(
text = element_text(colour = "black", family = "Arial"),
strip.background = element_rect(fill = "white", colour = "black", size = 0),
strip.text = element_text(size = 9, margin = margin(1, 1, 1, 1, unit = "pt")),
panel.border = element_rect(size = 0.1),
panel.spacing.x = unit(1, "lines"),
axis.text = element_text(colour = "black"),
axis.ticks = element_line(colour = "black", size = 0.1),
panel.grid = element_blank(),
panel.grid.major = element_line(
size = 0.1,
linetype = "dotted",
colour = "black"
),
panel.grid.minor = element_line(
size = 0.05,
linetype = "dotted",
colour = "darkgrey"
),
) +
scale_x_continuous(
limits = c(0, 0.31),
expand = c(0, 0),
breaks = seq(0, 0.4, 0.1)
) +
scale_y_continuous(
limits = c(0, 0.31),
expand = c(0, 0), breaks = seq(0, 0.4, 0.1)
) +
coord_equal() +
labs(
x = "amino acid frequency (GMP)",
y = "amino acid frequency (PKC)"
) +
NULL
)
save_plot(
plot = aacomp_scatter_plot,
name = "figure_S4"
)
# phosphoryaltion predictions
fam20c_dataset <- kappa_casein_fam20c_matches_tidy %>%
left_join(kappa_caseins_alignment_positions, by = c("species", "match_position" = "seq_index")) %>%
filter(!is.na(match_position))
# order_species_tree()
plot_kappa_casein_pred_phospho_positions <- plot_kappa_caseins_parts +
geom_segment(
size = 0.2,
data = fam20c_dataset,
mapping = aes(
x = msa_index,
xend = msa_index,
y = as.integer(species) - 0.5,
yend = as.integer(species) + 0.5,
colour = "predicted\nposition"
),
inherit.aes = FALSE
) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
) +
labs(title = "Phosphorylations") +
scale_colour_manual(values = c("predicted\nposition" = "black")) +
NULL
# O-glycosylation predictions
oglyc_dataset <- kappa_casein_oglyc_predictions %>%
filter(Oglyc_pred) %>%
left_join(kappa_caseins_alignment_positions, by = c("species", "position" = "seq_index")) %>%
order_species_tree()
plot_kappa_casein_pred_oglyc_positions <- plot_kappa_caseins_parts +
geom_segment(
size = 0.2,
data = oglyc_dataset,
mapping = aes(
x = msa_index,
xend = msa_index,
y = as.integer(species) - 0.5,
yend = as.integer(species) + 0.5
),
inherit.aes = FALSE
) +
theme(
axis.text.y = element_text(colour = "black", family = "Arial"),
legend.position = "none"
) +
labs(title = "O-glycosylations") +
NULL
(plot_phospho_oglyco_positions <- plot_kappa_casein_tree + {
plot_kappa_casein_pred_phospho_positions +
plot_kappa_casein_pred_oglyc_positions &
theme(
text = element_text(colour = "black", family = "Arial"),
plot.margin = margin(0, 8, 0, 0)
)
} +
plot_layout(widths = c(0.8, 4, 4))
)
## Warning in xList[i] <- valueList: number of items to replace is not a
## multiple of replacement length
save_plot(
plot = plot_phospho_oglyco_positions,
name = "figure_S5"
)
## Warning in xList[i] <- valueList: number of items to replace is not a
## multiple of replacement length
## Warning in xList[i] <- valueList: number of items to replace is not a
## multiple of replacement length
cysteines_positions <- kappa_caseins_alignment_positions %>%
filter(residue == "C") %>%
select(-seq_index, -residue)
plot_kappa_casein_cysteines <- plot_kappa_caseins_parts +
geom_segment(
size = 0.2,
data = cysteines_positions,
mapping = aes(
x = msa_index,
xend = msa_index,
y = as.integer(species) - 0.5,
yend = as.integer(species) + 0.5,
colour = "cysteine"
),
inherit.aes = FALSE
) +
scale_colour_manual(values = c("cysteine" = "black")) +
theme(
legend.position = c(1, 1),
legend.justification = c(1, 1),
axis.text.y = element_text(colour = "black", family = "Arial"),
) +
# labs(title = "Cysteines") +
NULL
(cysteines_position_plot <- {
plot_kappa_casein_tree +
plot_kappa_casein_cysteines
} +
plot_layout(width = c(1, 4)) &
theme(
text = element_text(colour = "black", family = "Arial"),
plot.margin = margin(0, 0, 0, 0)
)
)
save_plot(
plot = cysteines_position_plot,
name = "figure_S6"
)
# formating for html report
kappa_casein_gi %>%
mutate_at("species", fct_relabel, underscore_to_space) %>%
kable(format = "html") %>%
kable_styling()
| species | taxid | gi |
|---|---|---|
| Tachyglossus aculeatus | 9261 | 255661244 |
| Ornithorhynchus anatinus | 9258 | 255661248 |
| Trichosurus vulpecula | 9337 | 4559294 |
| Monodelphis domestica | 13616 | 126330606 |
| Orycteropus afer | 1230840 | 634853626 |
| Trichechus manatus | 127582 | NA |
| Loxodonta africana | 9785 | 731494712 |
| Oryctolagus cuniculus | 9986 | 1607 |
| Ochotona princeps | 9978 | 504173483 |
| Marmota marmota | 9994 | 984137438 |
| Ictidomys tridecemlineatus | 43179 | 532095382 |
| Octodon degus | 10160 | 507713885 |
| Cavia porcellus | 10141 | 115670 |
| Heterocephalus glaber | 10181 | 351699116 |
| Fukomys damarensis | 885580 | 1104935430 |
| Castor canadensis | 51338 | 1147391913 |
| Dipodomys ordii | 10020 | 852759962 |
| Jaculus jaculus | 51337 | 507563375 |
| Nannospalax galili | 1026970 | 674032426 |
| Peromyscus maniculatus | 230844 | 1008789005 |
| Microtus ochrogaster | 79684 | 532033496 |
| Rattus norvegicus | 10116 | 13928762 |
| Mus pahari | 10093 | 1195508802 |
| Mus caroli | 10089 | 1195722227 |
| Mus musculus | 10090 | 75677412 |
| Otolemur garnettii | 30611 | 395857266 |
| Carlito syrichta | 1868482 | 640822240 |
| Aotus nancymaae | 37293 | 817316329 |
| Callithrix jacchus | 9483 | 1060981464 |
| Cebus capucinus | 1737458 | 1044411177 |
| Saimiri boliviensis | 39432 | 403280959 |
| Nomascus leucogenys | 61853 | 332233119 |
| Pongo abelii | 9601 | 297673660 |
| Gorilla gorilla | 9595 | 426344545 |
| Homo sapiens | 9606 | 29676 |
| Pan paniscus | 9597 | 397475232 |
| Pan troglodytes | 9598 | 114594392 |
| Colobus angolensis | 336983 | 795340768 |
| Rhinopithecus roxellana | 61622 | 724803799 |
| Chlorocebus sabaeus | 60711 | 635042818 |
| Papio anubis | 9555 | 402869633 |
| Cercocebus atys | 9531 | 795383260 |
| Mandrillus leucophaeus | 9568 | 795308736 |
| Macaca nemestrina | 9545 | 795623958 |
| Macaca mulatta | 9544 | 109074586 |
| Macaca fascicularis | 9541 | 355749359 |
| Condylura cristata | 143302 | 507942890 |
| Erinaceus europaeus | 9365 | 617605017 |
| Rousettus aegyptiacus | 9407 | 1012258563 |
| Pteropus alecto | 9402 | 586524827 |
| Pteropus vampyrus | 132908 | 759121339 |
| Rhinolophus sinicus | 89399 | 1124008234 |
| Miniopterus natalensis | 291302 | 1016674193 |
| Eptesicus fuscus | 29078 | 641730880 |
| Myotis lucifugus | 59463 | 940768390 |
| Myotis brandtii | 109478 | 946799019 |
| Manis javanica | 9974 | 1048452318 |
| Acinonyx jubatus | 32536 | 961710667 |
| Felis catus | 9685 | 410957476 |
| Panthera pardus | 9691 | 1111079331 |
| Panthera tigris | 74533 | 591321662 |
| Canis lupus | 9615 | 545521723 |
| Ursus maritimus | 29073 | 671001068 |
| Ailuropoda melanoleuca | 9646 | 1126262988 |
| Mustela putorius | 9669 | 859857021 |
| Enhydra lutris | 391180 | 1244101367 |
| Odobenus rosmarus | 9708 | 472346437 |
| Leptonychotes weddellii | 9713 | 585154038 |
| Neomonachus schauinslandi | 29088 | 1212216747 |
| Ceratotherium simum | 73337 | 955478944 |
| Equus asinus | 9793 | 958718360 |
| Equus caballus | 9796 | 19031197 |
| Camelus bactrianus | 9837 | 429534184 |
| Camelus dromedarius | 9838 | 1742992 |
| Lama glama | 9844 | 787034497 |
| Vicugna pacos | 30538 | 560980638 |
| Sus scrofa | 9823 | 55742766 |
| Balaenoptera acutorostrata | 310752 | 594692663 |
| Physeter catodon | 9755 | 593761147 |
| Lipotes vexillifer | 118797 | 602729614 |
| Delphinapterus leucas | 9749 | 1246240428 |
| Tursiops truncatus | 9739 | 470658938 |
| Orcinus orca | 9733 | 466001209 |
| Odocoileus virginianus | 9880 | 1187550739 |
| Cervus nippon | 9863 | 295705 |
| Bubalus bubalis | 89462 | 295701 |
| Bos taurus | 9913 | 1228078 |
| Bison bison | 43346 | 742114810 |
| Bos mutus | 72004 | 440904989 |
| Saiga tatarica | 34875 | 1033241 |
| Pantholops hodgsonii | 59538 | 556777482 |
| Rupicapra rupicapra | 34869 | 1033238 |
| Ovis aries | 9940 | 57164381 |
| Capra hircus | 9925 | 978 |
| Oreamnos americanus | 34873 | 1033235 |
| Naemorhedus goral | 34871 | 1033232 |
| Capricornis swinhoei | 34866 | 1033201 |
| Capricornis sumatraensis | 34865 | 1033203 |
| Capricornis crispus | 9966 | 295703 |
# formatting for latex and export
kappa_casein_gi %>%
mutate_at("species", ~ glue("\\textit{<<.x>>}", .open = "<<", .close = ">>")) %>%
mutate_at("species", ~ fct_recode(.x,
"\\textit{Ictidomys_tridecemlineatus} \\textsuperscript{b}" = "\\textit{Ictidomys_tridecemlineatus}",
"\\textit{Fukomys_damarensis} \\textsuperscript{c}" = "\\textit{Fukomys_damarensis}",
"\\textit{Bos_mutus} \\textsuperscript{h}" = "\\textit{Bos_mutus}",
"\\textit{Carlito_syrichta} \\textsuperscript{e}" = "\\textit{Carlito_syrichta}",
"\\textit{Vicugna_pacos} \\textsuperscript{g}" = "\\textit{Vicugna_pacos}",
"\\textit{Neomonachus_schauinslandi} \\textsuperscript{f}" = "\\textit{Neomonachus_schauinslandi}",
"\\textit{Nannospalax_galili} \\textsuperscript{d}" = "\\textit{Nannospalax_galili}"
)) %>%
mutate_at("species", fct_relabel, underscore_to_space) %>%
mutate_at("gi", ~ ifelse(is.na(.x), "\\textsuperscript{a}", .x)) %>%
rename(
`NCBI Taxon ID` = taxid,
`NCBI GI Identifier` = gi
) %>%
knitr::kable(
format = "latex",
booktabs = TRUE,
linesep = "",
longtable = TRUE,
caption.short = "Kappa-casein GenInfo Identifiers",
label = "gi_species",
caption = "\\textbf{Kappa-casein GenInfo Identifiers.}",
escape = F
) %>%
kable_styling(
latex_options = c(
"striped",
"hold_position",
"full_width",
"condensed",
"repeat_header"
),
full_width = TRUE,
repeat_header_continued = TRUE
) %>%
# group_rows("Monotremes", 1, 2) %>%
# group_rows("Marsupials", 3, 4) %>%
# group_rows("Placentals", 5, 99) %>%
footnote(
alphabet = c(
"First exon was recovered from a BLAST with the elephant sequence", # a - Manatee
"Was ``Spermophilus tridecemlineatus'' in @cite{fritz_2009}", # b - Ictidomys_tridecemlineatus
"Was ``Cryptomys damarensis'' in @cite{fritz_2009}", # c - Fukomys_damarensis
"Used instead of ``Spalax ehrenbergi'' in @cite{fritz_2009}", # d - Nannospalax galili
"Was ``Tarsius syrichta'' in @cite{fritz_2009}''", # e - Carlito_syrichta
"Was ``Monachus schauinslandi\" in @cite{fritz_2009}''", # f - Neomonachus schauinslandi
"Was ``Vicugna vicugna'' in @cite{fritz_2009}", # g - Vicugna_pacos
"Was ``Bos grunniens'' in @cite{fritz_2009}" # h - Bos mutus
),
escape = FALSE
) %>%
str_replace_all("@cite", "\\\\textcite") %>%
save_table(table = ., name = "table_S1")
# formating for html report
kappa.casein.manual.repeats %>%
mutate_at("species", underscore_to_space) %>%
kable(format = "html") %>%
kable_styling()
| species | N_flank_position | C_flank_position | sequence |
|---|---|---|---|
| Saiga tatarica | 137 | 142 | EAIVNT |
| Saiga tatarica | 143 | 148 | EAIVNT |
| Saiga tatarica | 149 | 154 | EAIVNT |
| Bos mutus | 147 | 150 | EASP |
| Bos mutus | 151 | 154 | EASP |
| Otolemur garnettii | 112 | 114 | PTT |
| Otolemur garnettii | 115 | 117 | PTI |
| Otolemur garnettii | 141 | 161 | PETSSVSAVTNTLEAAAVTVT |
| Otolemur garnettii | 162 | 182 | PEASSVSAITNTLEAAAVTVT |
| Otolemur garnettii | 183 | 203 | PEASSVSAVTNTLEAAAVTVT |
| Myotis lucifugus | 95 | 131 | PSLFAIPPKKNQDKAVIPTANTVPADEPTLIPPSEST |
| Myotis lucifugus | 132 | 168 | PPLFAVPPKKNQDKAVIPIVNTVPADEATLFPPSEST |
| Myotis lucifugus | 169 | 205 | PPLFATPPKKNQDKAVIPTINIIPADEPTVILSSEPT |
| Myotis brandtii | 95 | 131 | PSLFAIPPKKNQDKAVIPTANTVPADEPTLIPPSEST |
| Myotis brandtii | 132 | 168 | PPLIAIPPKKNQDKAVIPIVNTVPADEPTLFPPSEST |
| Myotis brandtii | 169 | 205 | PPLIAIPPKKNQDKAVIPTINIIAADEPTVILSSEPT |
| Jaculus jaculus | 101 | 112 | NADPNASAIPSA |
| Jaculus jaculus | 113 | 124 | NAHPDASAIPSA |
| Jaculus jaculus | 125 | 136 | NAHPDASAIPSP |
| Cavia porcellus | 133 | 145 | SAGDTPEVSSQFI |
| Cavia porcellus | 146 | 171 | DTPDTSVLAEEARESPEDTPEISEFI |
| Cavia porcellus | 172 | 198 | NAPDTAVPSEEPRESAEDTPEISSEFI |
| Castor canadensis | 51 | 62 | INNPYMPYPYYV |
| Castor canadensis | 63 | 74 | ISNPYMSYPYYS |
| Dipodomys ordii | 48 | 62 | INSPYMPFPYYA |
| Dipodomys ordii | 63 | 69 | VNN LPYTYST |
| Manis javanica | 33 | 35 | NSL |
| Manis javanica | 36 | 38 | NSS |
| Sus scrofa | 128 | 133 | EPIVNA |
| Sus scrofa | 134 | 139 | EPIVNA |
format_species_ptr_table <- . %>%
str_replace("Dipodomys_ordii", glue("Dipodomys_ordii \\{footnote_marker_symbol(1, 'latex')}")) %>%
fct_relabel(underscore_to_space) %>%
map_chr(~ glue("\\textit{<<.x>>}", .open = "<<", .close = ">>")) # italicized
format_sequence_ptr_table <- . %>%
map_chr(~ str_remove_all(.x, "[ -]")) %>% # remove gaps
map_chr(~ glue("\\texttt{<<.x>>}", .open = "<<", .close = ">>")) # monospaced
# formatting for latex and export
kappa.casein.manual.repeats %>%
order_species_tree() %>%
mutate_at("species", format_species_ptr_table) %>%
mutate_at("sequence", format_sequence_ptr_table) %>%
rename(N = N_flank_position, C = C_flank_position) %>%
knitr::kable(
format = "latex",
booktabs = TRUE,
linesep = "",
longtable = TRUE,
caption.short = "Protein tandem repeats found in kappa-casein sequences.",
label = "supp_ptr",
caption = "\\textbf{Protein tandem repeats found in kappa-casein sequences.}
Positions are given for the extremities of each tandem repeat unit
in the mature sequence.",
escape = FALSE
) %>%
add_header_above(c(" ", "position" = 2, " ")) %>%
kable_styling(
latex_options = c(
"striped",
"hold_position",
"full_width",
"condensed"
),
full_width = TRUE,
repeat_header_continued = TRUE
) %>%
footnote(symbol = c("likely highly degenerate repeat")) %>%
column_spec(1, width = "1.5in") %>%
column_spec(2, width = "0.5in") %>%
column_spec(3, width = "0.5in") %>%
save_table(table = ., name = "table_S2")
# formating for html report
all_predictions_metrics %>%
mutate_if(is.numeric, round, digits = 2) %>%
kable(format = "html") %>%
kable_styling()
| method | sensivity | specificity | precision | accuracy | MCC |
|---|---|---|---|---|---|
| glycomine | 0.75 | 0.95 | 0.86 | 0.89 | 0.72 |
| glycopred | 0.94 | 0.32 | 0.38 | 0.51 | 0.28 |
| O-GlcNAcPRED-II | 0.56 | 0.62 | 0.39 | 0.60 | 0.17 |
| netoglyc | 0.56 | 0.54 | 0.35 | 0.55 | 0.09 |
# formatting for latex and export
all_predictions_metrics %>%
mutate_if(is.numeric, round, digits = 2) %>%
mutate(method = fct_recode(method,
"GlycoMine" = "glycomine",
"O-GlcNAcPRED-II" = "O-GlcNAcPRED-II",
"NetOClyc4.0" = "netoglyc",
"GlycoPred" = "glycopred"
)) %>%
kable(
format = "latex",
longtable = TRUE,
booktabs = TRUE,
caption.short = "Prediction of O-glycosylation in kappa-casein",
label = "supp_glyco_pred_methods",
caption = "\\textbf{Prediction of O-glycosylation in kappa-casein
with GlycoMine \\autocite{li_2015a};
GlycoPred \\autocite{hamby_2008};
O-GlcNAcPRED-II \\autocite{jia_2018};
and NetOGlyc4.0 \\autocite{steentoft_2013}.}",
escape = F
) %>%
kable_styling(latex_options = c("striped", "hold_position", "full_width")) %>%
save_table(table = ., name = "table_S3")
## ─ Session info ──────────────────────────────────────────────────────────────────────────────
## setting value
## version R version 3.4.4 (2018-03-15)
## os Linux Mint 18.3
## system x86_64, linux-gnu
## ui X11
## language en_IE:en
## collate en_IE.UTF-8
## ctype en_IE.UTF-8
## tz Europe/Dublin
## date 2019-05-21
##
## ─ Packages ──────────────────────────────────────────────────────────────────────────────────
## package * version date lib source
## acepack 1.4.1 2016-10-29 [1] CRAN (R 3.4.2)
## ade4 1.7-13 2018-08-31 [1] CRAN (R 3.4.4)
## ape * 5.3 2019-03-17 [1] CRAN (R 3.4.4)
## aphid 1.3.3 2019-05-08 [1] CRAN (R 3.4.4)
## askpass 1.1 2019-01-13 [1] CRAN (R 3.4.4)
## assertthat * 0.2.1 2019-03-21 [1] CRAN (R 3.4.4)
## backports 1.1.4 2019-04-10 [1] CRAN (R 3.4.4)
## badger 0.0.4 2019-01-08 [1] CRAN (R 3.4.4)
## base64enc 0.1-3 2015-07-28 [1] CRAN (R 3.4.2)
## BiocGenerics 0.24.0 2018-04-29 [1] Bioconductor
## Biostrings 2.46.0 2018-04-29 [1] Bioconductor
## broom 0.5.2 2019-04-07 [1] CRAN (R 3.4.4)
## cellranger 1.1.0 2016-07-27 [1] CRAN (R 3.4.3)
## checkmate 1.9.3 2019-05-03 [1] CRAN (R 3.4.4)
## cleaver 1.21.4 2019-01-25 [1] Github (sgibb/cleaver@1ccaedc)
## cli 1.1.0 2019-03-19 [1] CRAN (R 3.4.4)
## cluster 2.0.7-1 2018-04-09 [4] CRAN (R 3.4.4)
## codetools 0.2-15 2016-10-05 [4] CRAN (R 3.3.1)
## colorspace 1.4-1 2019-03-18 [1] CRAN (R 3.4.4)
## crayon 1.3.4 2017-09-16 [1] CRAN (R 3.4.4)
## data.table 1.12.2 2019-04-07 [1] CRAN (R 3.4.4)
## digest 0.6.18 2018-10-10 [1] CRAN (R 3.4.4)
## dlstats 0.1.0 2017-08-07 [1] CRAN (R 3.4.4)
## dplyr * 0.8.1 2019-05-14 [1] CRAN (R 3.4.4)
## ellipsis 0.1.0 2019-02-19 [1] CRAN (R 3.4.4)
## evaluate 0.13 2019-02-12 [1] CRAN (R 3.4.4)
## farver * 1.1.0 2018-11-20 [1] CRAN (R 3.4.4)
## fastmatch 1.1-0 2017-01-28 [1] CRAN (R 3.4.2)
## forcats * 0.4.0 2019-02-17 [1] CRAN (R 3.4.4)
## foreign 0.8-71 2018-07-20 [1] CRAN (R 3.4.4)
## formatR 1.6 2019-03-05 [1] CRAN (R 3.4.4)
## Formula * 1.2-3 2018-05-03 [1] CRAN (R 3.4.4)
## generics 0.0.2 2018-11-29 [1] CRAN (R 3.4.4)
## geoscale * 2.0 2015-05-14 [1] CRAN (R 3.4.4)
## ggforce * 0.2.2 2019-04-23 [1] CRAN (R 3.4.4)
## ggplot2 * 3.1.1 2019-04-07 [1] CRAN (R 3.4.4)
## ggpmisc * 0.3.1 2019-04-02 [1] CRAN (R 3.4.4)
## ggstance * 0.3.1 2018-07-20 [1] CRAN (R 3.4.4)
## ggthemes * 4.2.0 2019-05-13 [1] CRAN (R 3.4.4)
## ggtree * 1.17.1 2019-05-19 [1] Github (GuangchuangYu/ggtree@c00aac1)
## glue * 1.3.1 2019-03-12 [1] CRAN (R 3.4.4)
## gridExtra 2.3 2017-09-09 [1] CRAN (R 3.4.2)
## gtable 0.3.0 2019-03-25 [1] CRAN (R 3.4.4)
## haven 2.1.0 2019-02-19 [1] CRAN (R 3.4.4)
## here * 0.1 2017-05-28 [1] CRAN (R 3.4.3)
## highr 0.8 2019-03-20 [1] CRAN (R 3.4.4)
## Hmisc * 4.2-0 2019-01-26 [1] CRAN (R 3.4.4)
## hms 0.4.2 2018-03-10 [1] CRAN (R 3.4.3)
## htmlTable 1.13.1 2019-01-07 [1] CRAN (R 3.4.4)
## htmltools 0.3.6 2017-04-28 [1] CRAN (R 3.4.2)
## htmlwidgets 1.3 2018-09-30 [1] CRAN (R 3.4.4)
## httr 1.4.0 2018-12-11 [1] CRAN (R 3.4.4)
## igraph 1.2.4.1 2019-04-22 [1] CRAN (R 3.4.4)
## IRanges 2.12.0 2018-04-29 [1] Bioconductor
## jsonlite 1.6 2018-12-07 [1] CRAN (R 3.4.4)
## kableExtra * 1.1.0 2019-03-16 [1] CRAN (R 3.4.4)
## kmer 1.1.1 2019-03-15 [1] CRAN (R 3.4.4)
## knitr * 1.23 2019-05-18 [1] CRAN (R 3.4.4)
## labeling 0.3 2014-08-23 [1] CRAN (R 3.4.2)
## lattice * 0.20-38 2018-11-04 [1] CRAN (R 3.4.4)
## latticeExtra 0.6-28 2016-02-09 [1] CRAN (R 3.4.2)
## lazyeval 0.2.2 2019-03-15 [1] CRAN (R 3.4.4)
## lubridate 1.7.4 2018-04-11 [1] CRAN (R 3.4.4)
## magrittr * 1.5 2014-11-22 [1] CRAN (R 3.4.3)
## MASS 7.3-51.4 2019-04-26 [1] CRAN (R 3.4.4)
## Matrix 1.2-14 2018-04-09 [4] CRAN (R 3.4.4)
## modelr 0.1.4 2019-02-18 [1] CRAN (R 3.4.4)
## munsell 0.5.0 2018-06-12 [1] CRAN (R 3.4.4)
## nlme 3.1-137 2018-04-07 [4] CRAN (R 3.4.4)
## nnet 7.3-12 2016-02-02 [4] CRAN (R 3.4.0)
## openssl 1.3 2019-03-22 [1] CRAN (R 3.4.4)
## patchwork * 0.0.1 2018-09-24 [1] Github (thomasp85/patchwork@fd7958b)
## Peptides * 2.4 2018-06-08 [1] CRAN (R 3.4.4)
## phangorn 2.6.0 2019-05-19 [1] Github (KlausVigo/phangorn@3eadeaf)
## phylogram 2.1.0 2018-06-25 [1] CRAN (R 3.4.4)
## pillar 1.4.0 2019-05-11 [1] CRAN (R 3.4.4)
## pkgconfig 2.0.2 2018-08-16 [1] CRAN (R 3.4.4)
## plyr 1.8.4 2016-06-08 [1] CRAN (R 3.4.2)
## polyclip 1.10-0 2019-03-14 [1] CRAN (R 3.4.4)
## ProjectTemplate * 0.9.0 2019-02-26 [1] CRAN (R 3.4.4)
## purrr * 0.3.2 2019-03-15 [1] CRAN (R 3.4.4)
## quadprog 1.5-7 2019-05-06 [1] CRAN (R 3.4.4)
## R6 2.4.0 2019-02-14 [1] CRAN (R 3.4.4)
## RColorBrewer 1.1-2 2014-12-07 [1] CRAN (R 3.4.3)
## Rcpp 1.0.1 2019-03-17 [1] CRAN (R 3.4.4)
## readr * 1.3.1 2018-12-21 [1] CRAN (R 3.4.4)
## readxl 1.3.1 2019-03-13 [1] CRAN (R 3.4.4)
## rlang 0.3.4 2019-04-07 [1] CRAN (R 3.4.4)
## rmarkdown 1.12 2019-03-14 [1] CRAN (R 3.4.4)
## rpart 4.1-15 2019-04-12 [4] CRAN (R 3.4.4)
## rprojroot 1.3-2 2018-01-03 [1] CRAN (R 3.4.3)
## rstudioapi 0.10 2019-03-19 [1] CRAN (R 3.4.4)
## rvcheck 0.1.3 2018-12-06 [1] CRAN (R 3.4.4)
## rvest * 0.3.4 2019-05-15 [1] CRAN (R 3.4.4)
## S4Vectors 0.16.0 2018-04-29 [1] Bioconductor
## scales 1.0.0 2018-08-09 [1] CRAN (R 3.4.4)
## seqinr 3.4-5 2017-08-01 [1] CRAN (R 3.4.3)
## sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 3.4.4)
## stringi 1.4.3 2019-03-12 [1] CRAN (R 3.4.4)
## stringr * 1.4.0 2019-02-10 [1] CRAN (R 3.4.4)
## survival * 2.44-1.1 2019-04-01 [4] CRAN (R 3.4.4)
## tibble * 2.1.1 2019-03-16 [1] CRAN (R 3.4.4)
## tidyr * 0.8.3 2019-03-01 [1] CRAN (R 3.4.4)
## tidyselect 0.2.5 2018-10-11 [1] CRAN (R 3.4.4)
## tidytree 0.2.4 2019-02-27 [1] CRAN (R 3.4.4)
## tidyverse * 1.2.1 2017-11-14 [1] CRAN (R 3.4.4)
## treeio 1.9.0 2019-05-19 [1] Github (GuangchuangYu/treeio@12b7af4)
## tweenr 1.0.1 2018-12-14 [1] CRAN (R 3.4.4)
## viridisLite 0.3.0 2018-02-01 [1] CRAN (R 3.4.3)
## webshot 0.5.1 2018-09-28 [1] CRAN (R 3.4.4)
## withr * 2.1.2.9000 2019-05-19 [1] Github (jimhester/withr@79c5e36)
## xfun 0.7 2019-05-14 [1] CRAN (R 3.4.4)
## XML * 3.99-0 2018-03-06 [1] local
## xml2 * 1.2.0 2018-01-24 [1] CRAN (R 3.4.3)
## XVector 0.18.0 2018-04-29 [1] Bioconductor
## yaml 2.2.0 2018-07-25 [1] CRAN (R 3.4.4)
## zlibbioc 1.24.0 2018-04-29 [1] Bioconductor
##
## [1] /home/jean/R/x86_64-pc-linux-gnu-library/3.4
## [2] /usr/local/lib/R/site-library
## [3] /usr/lib/R/site-library
## [4] /usr/lib/R/library